AOMedia AV1 Codec
nonrd_opt.h
1/*
2 * Copyright (c) 2022, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12#ifndef AOM_AV1_ENCODER_NONRD_OPT_H_
13#define AOM_AV1_ENCODER_NONRD_OPT_H_
14
15#include "av1/encoder/context_tree.h"
16#include "av1/encoder/rdopt_utils.h"
17#include "av1/encoder/rdopt.h"
18
19#define RTC_INTER_MODES (4)
20#define RTC_INTRA_MODES (4)
21#define RTC_MODES (AOMMAX(RTC_INTER_MODES, RTC_INTRA_MODES))
22#define CALC_BIASED_RDCOST(rdcost) (7 * (rdcost) >> 3)
23#define NUM_COMP_INTER_MODES_RT (6)
24#define NUM_COMP_INTER_MODES_RT_FULL (10)
25#define NUM_INTER_MODES 12
26#define NUM_INTER_MODES_FULL 28
27#define CAP_TX_SIZE_FOR_BSIZE_GT32(tx_mode_search_type, bsize) \
28 (((tx_mode_search_type) != ONLY_4X4 && (bsize) > BLOCK_32X32) ? true : false)
29#define TX_SIZE_FOR_BSIZE_GT32 (TX_16X16)
30#define FILTER_SEARCH_SIZE 2
31#if !CONFIG_REALTIME_ONLY
32#define MOTION_MODE_SEARCH_SIZE 2
33#endif
34
35extern int g_pick_inter_mode_cnt;
37typedef struct {
38 uint8_t *data;
39 int stride;
40 int in_use;
41} PRED_BUFFER;
42
43typedef struct {
44 PRED_BUFFER *best_pred;
45 PREDICTION_MODE best_mode;
46 TX_SIZE best_tx_size;
47 TX_TYPE tx_type;
48 MV_REFERENCE_FRAME best_ref_frame;
49 MV_REFERENCE_FRAME best_second_ref_frame;
50 uint8_t best_mode_skip_txfm;
51 uint8_t best_mode_initial_skip_flag;
52 int_interpfilters best_pred_filter;
53 MOTION_MODE best_motion_mode;
54 WarpedMotionParams wm_params;
55 int num_proj_ref;
56 PALETTE_MODE_INFO pmi;
57 int64_t best_sse;
58} BEST_PICKMODE;
59
60typedef struct {
61 MV_REFERENCE_FRAME ref_frame;
62 PREDICTION_MODE pred_mode;
63} REF_MODE;
64
65typedef struct {
66 MV_REFERENCE_FRAME ref_frame[2];
67 PREDICTION_MODE pred_mode;
68} COMP_REF_MODE;
69
70struct estimate_block_intra_args {
71 AV1_COMP *cpi;
72 MACROBLOCK *x;
73 PREDICTION_MODE mode;
74 int skippable;
75 RD_STATS *rdc;
76 unsigned int best_sad;
77 bool prune_mode_based_on_sad;
78 bool prune_palette_sad;
79};
81
85typedef struct {
87 BEST_PICKMODE best_pickmode;
89 RD_STATS this_rdc;
91 RD_STATS best_rdc;
93 int64_t uv_dist[RTC_INTER_MODES][REF_FRAMES];
95 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
97 unsigned int vars[RTC_INTER_MODES][REF_FRAMES];
99 unsigned int ref_costs_single[REF_FRAMES];
101 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
103 int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES];
105 int single_inter_mode_costs[RTC_INTER_MODES][REF_FRAMES];
107 int use_ref_frame_mask[REF_FRAMES];
109 uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES];
111 bool use_scaled_ref_frame[REF_FRAMES];
113
114static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2,
115 2, 2, 3, 3, 3, 4,
116 4, 4, 5, 5 };
117static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1,
118 2, 3, 2, 3, 4, 3,
119 4, 5, 4, 5 };
120
121static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
122 SMOOTH_PRED };
123
124static const PREDICTION_MODE inter_mode_list[] = { NEARESTMV, NEARMV, GLOBALMV,
125 NEWMV };
126
127static const THR_MODES mode_idx[REF_FRAMES][RTC_MODES] = {
128 { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
129 { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
130 { THR_NEARESTL2, THR_NEARL2, THR_GLOBALL2, THR_NEWL2 },
131 { THR_NEARESTL3, THR_NEARL3, THR_GLOBALL3, THR_NEWL3 },
132 { THR_NEARESTG, THR_NEARG, THR_GLOBALG, THR_NEWG },
133 { THR_NEARESTB, THR_NEARB, THR_GLOBALB, THR_NEWB },
134 { THR_NEARESTA2, THR_NEARA2, THR_GLOBALA2, THR_NEWA2 },
135 { THR_NEARESTA, THR_NEARA, THR_GLOBALA, THR_NEWA },
136};
137
138// GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT
139// mode
140static const REF_MODE ref_mode_set[NUM_INTER_MODES] = {
141 { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV },
142 { LAST_FRAME, GLOBALMV }, { LAST_FRAME, NEWMV },
143 { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
144 { GOLDEN_FRAME, GLOBALMV }, { GOLDEN_FRAME, NEWMV },
145 { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
146 { ALTREF_FRAME, GLOBALMV }, { ALTREF_FRAME, NEWMV },
147};
148
149static const REF_MODE ref_mode_set_full[NUM_INTER_MODES_FULL] = {
150 { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV },
151 { LAST_FRAME, GLOBALMV }, { LAST_FRAME, NEWMV },
152 { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
153 { GOLDEN_FRAME, GLOBALMV }, { GOLDEN_FRAME, NEWMV },
154 { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
155 { ALTREF_FRAME, GLOBALMV }, { ALTREF_FRAME, NEWMV },
156 { LAST2_FRAME, NEARESTMV }, { LAST2_FRAME, NEARMV },
157 { LAST2_FRAME, GLOBALMV }, { LAST2_FRAME, NEWMV },
158 { LAST3_FRAME, NEARESTMV }, { LAST3_FRAME, NEARMV },
159 { LAST3_FRAME, GLOBALMV }, { LAST3_FRAME, NEWMV },
160 { BWDREF_FRAME, NEARESTMV }, { BWDREF_FRAME, NEARMV },
161 { BWDREF_FRAME, GLOBALMV }, { BWDREF_FRAME, NEWMV },
162 { ALTREF2_FRAME, NEARESTMV }, { ALTREF2_FRAME, NEARMV },
163 { ALTREF2_FRAME, GLOBALMV }, { ALTREF2_FRAME, NEWMV },
164};
165
166static const COMP_REF_MODE comp_ref_mode_set[NUM_COMP_INTER_MODES_RT] = {
167 { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
168 { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
169 { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
170 { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
171 { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
172 { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
173};
174
175static const COMP_REF_MODE
176 comp_ref_mode_set_full[NUM_COMP_INTER_MODES_RT_FULL] = {
177 { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
178 { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
179 { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
180 { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
181 { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
182 { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
183 { { LAST_FRAME, BWDREF_FRAME }, GLOBAL_GLOBALMV },
184 { { LAST_FRAME, BWDREF_FRAME }, NEAREST_NEARESTMV },
185 { { LAST_FRAME, ALTREF2_FRAME }, GLOBAL_GLOBALMV },
186 { { LAST_FRAME, ALTREF2_FRAME }, NEAREST_NEARESTMV },
187 };
188
189static const int_interpfilters filters_ref_set[9] = {
190 [0].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
191 [1].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
192 [2].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH },
193 [3].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_REGULAR },
194 [4].as_filters = { MULTITAP_SHARP, MULTITAP_SHARP },
195 [5].as_filters = { EIGHTTAP_REGULAR, MULTITAP_SHARP },
196 [6].as_filters = { MULTITAP_SHARP, EIGHTTAP_REGULAR },
197 [7].as_filters = { EIGHTTAP_SMOOTH, MULTITAP_SHARP },
198 [8].as_filters = { MULTITAP_SHARP, EIGHTTAP_SMOOTH }
199};
200
201enum {
202 // INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
203 INTER_NEAREST = (1 << NEARESTMV),
204 INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
205 INTER_NEAREST_NEAR = (1 << NEARESTMV) | (1 << NEARMV),
206 INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV),
207};
208
209// The original scan order (default_scan_8x8) is modified according to the extra
210// transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and
211// aom_hadamard_8x8_c.
212DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_transpose[64]) = {
213 0, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40,
214 33, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
215 28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
216 23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63
217};
218
219// The original scan order (av1_default_iscan_8x8) is modified to match
220// hadamard AVX2 implementation, i.e., aom_hadamard_lp_8x8_avx2 and
221// aom_hadamard_8x8_avx2. Since hadamard AVX2 implementation will modify the
222// order of coefficients, such that the normal scan order is no longer
223// guaranteed to scan low coefficients first, therefore we modify the scan order
224// accordingly.
225// Note that this one has to be used together with default_scan_8x8_transpose.
226DECLARE_ALIGNED(16, static const int16_t,
227 av1_default_iscan_8x8_transpose[64]) = {
228 0, 2, 3, 9, 10, 20, 21, 35, 1, 4, 8, 11, 19, 22, 34, 36,
229 5, 7, 12, 18, 23, 33, 37, 48, 6, 13, 17, 24, 32, 38, 47, 49,
230 14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58,
231 27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63
232};
233
234// The original scan order (default_scan_16x16) is modified according to the
235// extra transpose in hadamard c implementation in lp case, i.e.,
236// aom_hadamard_lp_16x16_c.
237DECLARE_ALIGNED(16, static const int16_t,
238 default_scan_lp_16x16_transpose[256]) = {
239 0, 8, 2, 4, 10, 16, 24, 18, 12, 6, 64, 14, 20, 26, 32,
240 40, 34, 28, 22, 72, 66, 68, 74, 80, 30, 36, 42, 48, 56, 50,
241 44, 38, 88, 82, 76, 70, 128, 78, 84, 90, 96, 46, 52, 58, 1,
242 9, 3, 60, 54, 104, 98, 92, 86, 136, 130, 132, 138, 144, 94, 100,
243 106, 112, 62, 5, 11, 17, 25, 19, 13, 7, 120, 114, 108, 102, 152,
244 146, 140, 134, 192, 142, 148, 154, 160, 110, 116, 122, 65, 15, 21, 27,
245 33, 41, 35, 29, 23, 73, 67, 124, 118, 168, 162, 156, 150, 200, 194,
246 196, 202, 208, 158, 164, 170, 176, 126, 69, 75, 81, 31, 37, 43, 49,
247 57, 51, 45, 39, 89, 83, 77, 71, 184, 178, 172, 166, 216, 210, 204,
248 198, 206, 212, 218, 224, 174, 180, 186, 129, 79, 85, 91, 97, 47, 53,
249 59, 61, 55, 105, 99, 93, 87, 137, 131, 188, 182, 232, 226, 220, 214,
250 222, 228, 234, 240, 190, 133, 139, 145, 95, 101, 107, 113, 63, 121, 115,
251 109, 103, 153, 147, 141, 135, 248, 242, 236, 230, 238, 244, 250, 193, 143,
252 149, 155, 161, 111, 117, 123, 125, 119, 169, 163, 157, 151, 201, 195, 252,
253 246, 254, 197, 203, 209, 159, 165, 171, 177, 127, 185, 179, 173, 167, 217,
254 211, 205, 199, 207, 213, 219, 225, 175, 181, 187, 189, 183, 233, 227, 221,
255 215, 223, 229, 235, 241, 191, 249, 243, 237, 231, 239, 245, 251, 253, 247,
256 255
257};
258
259#if CONFIG_AV1_HIGHBITDEPTH
260// The original scan order (default_scan_16x16) is modified according to the
261// extra shift in hadamard c implementation in fp case, i.e.,
262// aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different
263// outputs, so we handle them separately.
264DECLARE_ALIGNED(16, static const int16_t,
265 default_scan_fp_16x16_transpose[256]) = {
266 0, 4, 2, 8, 6, 16, 20, 18, 12, 10, 64, 14, 24, 22, 32,
267 36, 34, 28, 26, 68, 66, 72, 70, 80, 30, 40, 38, 48, 52, 50,
268 44, 42, 84, 82, 76, 74, 128, 78, 88, 86, 96, 46, 56, 54, 1,
269 5, 3, 60, 58, 100, 98, 92, 90, 132, 130, 136, 134, 144, 94, 104,
270 102, 112, 62, 9, 7, 17, 21, 19, 13, 11, 116, 114, 108, 106, 148,
271 146, 140, 138, 192, 142, 152, 150, 160, 110, 120, 118, 65, 15, 25, 23,
272 33, 37, 35, 29, 27, 69, 67, 124, 122, 164, 162, 156, 154, 196, 194,
273 200, 198, 208, 158, 168, 166, 176, 126, 73, 71, 81, 31, 41, 39, 49,
274 53, 51, 45, 43, 85, 83, 77, 75, 180, 178, 172, 170, 212, 210, 204,
275 202, 206, 216, 214, 224, 174, 184, 182, 129, 79, 89, 87, 97, 47, 57,
276 55, 61, 59, 101, 99, 93, 91, 133, 131, 188, 186, 228, 226, 220, 218,
277 222, 232, 230, 240, 190, 137, 135, 145, 95, 105, 103, 113, 63, 117, 115,
278 109, 107, 149, 147, 141, 139, 244, 242, 236, 234, 238, 248, 246, 193, 143,
279 153, 151, 161, 111, 121, 119, 125, 123, 165, 163, 157, 155, 197, 195, 252,
280 250, 254, 201, 199, 209, 159, 169, 167, 177, 127, 181, 179, 173, 171, 213,
281 211, 205, 203, 207, 217, 215, 225, 175, 185, 183, 189, 187, 229, 227, 221,
282 219, 223, 233, 231, 241, 191, 245, 243, 237, 235, 239, 249, 247, 253, 251,
283 255
284};
285#endif
286
287// The original scan order (av1_default_iscan_16x16) is modified to match
288// hadamard AVX2 implementation, i.e., aom_hadamard_lp_16x16_avx2.
289// Since hadamard AVX2 implementation will modify the order of coefficients,
290// such that the normal scan order is no longer guaranteed to scan low
291// coefficients first, therefore we modify the scan order accordingly. Note that
292// this one has to be used together with default_scan_lp_16x16_transpose.
293DECLARE_ALIGNED(16, static const int16_t,
294 av1_default_iscan_lp_16x16_transpose[256]) = {
295 0, 44, 2, 46, 3, 63, 9, 69, 1, 45, 4, 64, 8, 68, 11,
296 87, 5, 65, 7, 67, 12, 88, 18, 94, 6, 66, 13, 89, 17, 93,
297 24, 116, 14, 90, 16, 92, 25, 117, 31, 123, 15, 91, 26, 118, 30,
298 122, 41, 148, 27, 119, 29, 121, 42, 149, 48, 152, 28, 120, 43, 150,
299 47, 151, 62, 177, 10, 86, 20, 96, 21, 113, 35, 127, 19, 95, 22,
300 114, 34, 126, 37, 144, 23, 115, 33, 125, 38, 145, 52, 156, 32, 124,
301 39, 146, 51, 155, 58, 173, 40, 147, 50, 154, 59, 174, 73, 181, 49,
302 153, 60, 175, 72, 180, 83, 198, 61, 176, 71, 179, 84, 199, 98, 202,
303 70, 178, 85, 200, 97, 201, 112, 219, 36, 143, 54, 158, 55, 170, 77,
304 185, 53, 157, 56, 171, 76, 184, 79, 194, 57, 172, 75, 183, 80, 195,
305 102, 206, 74, 182, 81, 196, 101, 205, 108, 215, 82, 197, 100, 204, 109,
306 216, 131, 223, 99, 203, 110, 217, 130, 222, 140, 232, 111, 218, 129, 221,
307 141, 233, 160, 236, 128, 220, 142, 234, 159, 235, 169, 245, 78, 193, 104,
308 208, 105, 212, 135, 227, 103, 207, 106, 213, 134, 226, 136, 228, 107, 214,
309 133, 225, 137, 229, 164, 240, 132, 224, 138, 230, 163, 239, 165, 241, 139,
310 231, 162, 238, 166, 242, 189, 249, 161, 237, 167, 243, 188, 248, 190, 250,
311 168, 244, 187, 247, 191, 251, 210, 254, 186, 246, 192, 252, 209, 253, 211,
312 255
313};
314
315#if CONFIG_AV1_HIGHBITDEPTH
316// The original scan order (av1_default_iscan_16x16) is modified to match
317// hadamard AVX2 implementation, i.e., aom_hadamard_16x16_avx2.
318// Since hadamard AVX2 implementation will modify the order of coefficients,
319// such that the normal scan order is no longer guaranteed to scan low
320// coefficients first, therefore we modify the scan order accordingly. Note that
321// this one has to be used together with default_scan_fp_16x16_transpose.
322DECLARE_ALIGNED(16, static const int16_t,
323 av1_default_iscan_fp_16x16_transpose[256]) = {
324 0, 44, 2, 46, 1, 45, 4, 64, 3, 63, 9, 69, 8, 68, 11,
325 87, 5, 65, 7, 67, 6, 66, 13, 89, 12, 88, 18, 94, 17, 93,
326 24, 116, 14, 90, 16, 92, 15, 91, 26, 118, 25, 117, 31, 123, 30,
327 122, 41, 148, 27, 119, 29, 121, 28, 120, 43, 150, 42, 149, 48, 152,
328 47, 151, 62, 177, 10, 86, 20, 96, 19, 95, 22, 114, 21, 113, 35,
329 127, 34, 126, 37, 144, 23, 115, 33, 125, 32, 124, 39, 146, 38, 145,
330 52, 156, 51, 155, 58, 173, 40, 147, 50, 154, 49, 153, 60, 175, 59,
331 174, 73, 181, 72, 180, 83, 198, 61, 176, 71, 179, 70, 178, 85, 200,
332 84, 199, 98, 202, 97, 201, 112, 219, 36, 143, 54, 158, 53, 157, 56,
333 171, 55, 170, 77, 185, 76, 184, 79, 194, 57, 172, 75, 183, 74, 182,
334 81, 196, 80, 195, 102, 206, 101, 205, 108, 215, 82, 197, 100, 204, 99,
335 203, 110, 217, 109, 216, 131, 223, 130, 222, 140, 232, 111, 218, 129, 221,
336 128, 220, 142, 234, 141, 233, 160, 236, 159, 235, 169, 245, 78, 193, 104,
337 208, 103, 207, 106, 213, 105, 212, 135, 227, 134, 226, 136, 228, 107, 214,
338 133, 225, 132, 224, 138, 230, 137, 229, 164, 240, 163, 239, 165, 241, 139,
339 231, 162, 238, 161, 237, 167, 243, 166, 242, 189, 249, 188, 248, 190, 250,
340 168, 244, 187, 247, 186, 246, 192, 252, 191, 251, 210, 254, 209, 253, 211,
341 255
342};
343#endif
344
345// For entropy coding, IDTX shares the scan orders of the other 2D-transforms,
346// but the fastest way to calculate the IDTX transform (i.e. no transposes)
347// results in coefficients that are a transposition of the entropy coding
348// versions. These tables are used as substitute for the scan order for the
349// faster version of IDTX.
350
351// Must be used together with av1_fast_idtx_iscan_4x4
352DECLARE_ALIGNED(16, static const int16_t,
353 av1_fast_idtx_scan_4x4[16]) = { 0, 1, 4, 8, 5, 2, 3, 6,
354 9, 12, 13, 10, 7, 11, 14, 15 };
355
356// Must be used together with av1_fast_idtx_scan_4x4
357DECLARE_ALIGNED(16, static const int16_t,
358 av1_fast_idtx_iscan_4x4[16]) = { 0, 1, 5, 6, 2, 4, 7, 12,
359 3, 8, 11, 13, 9, 10, 14, 15 };
360
361static const SCAN_ORDER av1_fast_idtx_scan_order_4x4 = {
362 av1_fast_idtx_scan_4x4, av1_fast_idtx_iscan_4x4
363};
364
365// Must be used together with av1_fast_idtx_iscan_8x8
366DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_8x8[64]) = {
367 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
368 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
369 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
370 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
371};
372
373// Must be used together with av1_fast_idtx_scan_8x8
374DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_8x8[64]) = {
375 0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42,
376 3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53,
377 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
378 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63
379};
380
381static const SCAN_ORDER av1_fast_idtx_scan_order_8x8 = {
382 av1_fast_idtx_scan_8x8, av1_fast_idtx_iscan_8x8
383};
384
385// Must be used together with av1_fast_idtx_iscan_16x16
386DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_16x16[256]) = {
387 0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4,
388 5, 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22,
389 37, 52, 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8,
390 9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100,
391 85, 70, 55, 40, 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131,
392 146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87, 72, 57, 42, 27,
393 12, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208,
394 224, 209, 194, 179, 164, 149, 134, 119, 104, 89, 74, 59, 44, 29, 14,
395 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225,
396 240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91, 76, 61, 46,
397 31, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227, 242,
398 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93, 78, 63, 79, 94,
399 109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185,
400 170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201, 216, 231,
401 246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203,
402 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235,
403 250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254,
404 255
405};
406
407// Must be used together with av1_fast_idtx_scan_16x16
408DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_16x16[256]) = {
409 0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90, 91, 119,
410 120, 2, 4, 7, 13, 16, 26, 29, 43, 46, 64, 67, 89, 92, 118,
411 121, 150, 3, 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117,
412 122, 149, 151, 9, 11, 18, 24, 31, 41, 48, 62, 69, 87, 94, 116,
413 123, 148, 152, 177, 10, 19, 23, 32, 40, 49, 61, 70, 86, 95, 115,
414 124, 147, 153, 176, 178, 20, 22, 33, 39, 50, 60, 71, 85, 96, 114,
415 125, 146, 154, 175, 179, 200, 21, 34, 38, 51, 59, 72, 84, 97, 113,
416 126, 145, 155, 174, 180, 199, 201, 35, 37, 52, 58, 73, 83, 98, 112,
417 127, 144, 156, 173, 181, 198, 202, 219, 36, 53, 57, 74, 82, 99, 111,
418 128, 143, 157, 172, 182, 197, 203, 218, 220, 54, 56, 75, 81, 100, 110,
419 129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55, 76, 80, 101, 109,
420 130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77, 79, 102, 108,
421 131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78, 103, 107,
422 132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106,
423 133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105,
424 134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253,
425 135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254,
426 255
427};
428
429// Indicates the blocks for which RD model should be based on special logic
430static inline int get_model_rd_flag(const AV1_COMP *cpi, const MACROBLOCKD *xd,
431 BLOCK_SIZE bsize) {
432 const AV1_COMMON *const cm = &cpi->common;
433 const int large_block = bsize >= BLOCK_32X32;
434 // Only enable for low bitdepth to mitigate issue: b/303023614.
435 return cpi->oxcf.rc_cfg.mode == AOM_CBR && large_block &&
436 !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
437 cm->quant_params.base_qindex && !cpi->oxcf.use_highbitdepth;
438}
464static inline void find_predictors(
465 AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
466 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
467 struct buf_2d yv12_mb[8][MAX_MB_PLANE], BLOCK_SIZE bsize,
468 int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame) {
469 AV1_COMMON *const cm = &cpi->common;
470 MACROBLOCKD *const xd = &x->e_mbd;
471 MB_MODE_INFO *const mbmi = xd->mi[0];
472 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
473 const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, ref_frame);
474 const bool ref_is_scaled =
475 ref->y_crop_height != cm->height || ref->y_crop_width != cm->width;
476 const YV12_BUFFER_CONFIG *scaled_ref =
477 av1_get_scaled_ref_frame(cpi, ref_frame);
478 const YV12_BUFFER_CONFIG *yv12 =
479 ref_is_scaled && scaled_ref ? scaled_ref : ref;
480 const int num_planes = av1_num_planes(cm);
481 x->pred_mv_sad[ref_frame] = INT_MAX;
482 x->pred_mv0_sad[ref_frame] = INT_MAX;
483 x->pred_mv1_sad[ref_frame] = INT_MAX;
484 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
485 // TODO(kyslov) this needs various further optimizations. to be continued..
486 assert(yv12 != NULL);
487 if (yv12 != NULL) {
488 struct scale_factors *const sf =
489 scaled_ref ? NULL : get_ref_scale_factors(cm, ref_frame);
490 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
491 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
492 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
493 mbmi_ext->mode_context);
494 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
495 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
496 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
497 av1_find_best_ref_mvs_from_stack(
498 cm->features.allow_high_precision_mv, mbmi_ext, ref_frame,
499 &frame_mv[NEARESTMV][ref_frame], &frame_mv[NEARMV][ref_frame], 0);
500 frame_mv[GLOBALMV][ref_frame] = mbmi_ext->global_mvs[ref_frame];
501 // Early exit for non-LAST frame if force_skip_low_temp_var is set.
502 if (!is_one_pass_rt_lag_params(cpi) && !ref_is_scaled &&
503 bsize >= BLOCK_8X8 && !skip_pred_mv &&
504 !(force_skip_low_temp_var && ref_frame != LAST_FRAME)) {
505 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
506 bsize);
507 }
508 }
510 av1_count_overlappable_neighbors(cm, xd);
511 }
512 mbmi->num_proj_ref = 1;
513 *use_scaled_ref_frame = ref_is_scaled && scaled_ref;
514}
515
516static inline void init_mbmi_nonrd(MB_MODE_INFO *mbmi,
517 PREDICTION_MODE pred_mode,
518 MV_REFERENCE_FRAME ref_frame0,
519 MV_REFERENCE_FRAME ref_frame1,
520 const AV1_COMMON *cm) {
521 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
522 mbmi->ref_mv_idx = 0;
523 mbmi->mode = pred_mode;
524 mbmi->uv_mode = UV_DC_PRED;
525 mbmi->ref_frame[0] = ref_frame0;
526 mbmi->ref_frame[1] = ref_frame1;
527 pmi->palette_size[PLANE_TYPE_Y] = 0;
528 pmi->palette_size[PLANE_TYPE_UV] = 0;
529 mbmi->filter_intra_mode_info.use_filter_intra = 0;
530 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
531 mbmi->motion_mode = SIMPLE_TRANSLATION;
532 mbmi->num_proj_ref = 1;
533 mbmi->interintra_mode = 0;
534 set_default_interp_filters(mbmi, cm->features.interp_filter);
535}
536
537static inline void init_estimate_block_intra_args(
538 struct estimate_block_intra_args *args, AV1_COMP *cpi, MACROBLOCK *x) {
539 args->cpi = cpi;
540 args->x = x;
541 args->mode = DC_PRED;
542 args->skippable = 1;
543 args->rdc = 0;
544 args->best_sad = UINT_MAX;
545 args->prune_mode_based_on_sad = false;
546 args->prune_palette_sad = false;
547}
548
549static inline int get_pred_buffer(PRED_BUFFER *p, int len) {
550 for (int buf_idx = 0; buf_idx < len; buf_idx++) {
551 if (!p[buf_idx].in_use) {
552 p[buf_idx].in_use = 1;
553 return buf_idx;
554 }
555 }
556 return -1;
557}
558
559static inline bool prune_palette_testing_inter(AV1_COMP *cpi,
560 unsigned int source_variance) {
561 return (
562 cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
563 cpi->oxcf.speed >= 11 && cpi->rc.high_source_sad &&
564 ((cpi->sf.rt_sf.prune_palette_search_nonrd > 2) ||
565 (cpi->sf.rt_sf.rc_compute_spatial_var_sc_kf &&
566 cpi->rc.frame_spatial_variance < 1200 &&
567 cpi->rc.perc_spatial_flat_blocks < 5 &&
568 cpi->rc.percent_blocks_with_motion > 98 && source_variance < 4000)));
569}
570
571static inline void free_pred_buffer(PRED_BUFFER *p) {
572 if (p != NULL) p->in_use = 0;
573}
574
575#if CONFIG_INTERNAL_STATS
576static inline void store_coding_context_nonrd(MACROBLOCK *x,
577 PICK_MODE_CONTEXT *ctx,
578 int mode_index) {
579#else
580static inline void store_coding_context_nonrd(MACROBLOCK *x,
581 PICK_MODE_CONTEXT *ctx) {
582#endif // CONFIG_INTERNAL_STATS
583 MACROBLOCKD *const xd = &x->e_mbd;
584 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
585
586 // Take a snapshot of the coding context so it can be
587 // restored if we decide to encode this way
588 ctx->rd_stats.skip_txfm = txfm_info->skip_txfm;
589
590 ctx->skippable = txfm_info->skip_txfm;
591#if CONFIG_INTERNAL_STATS
592 ctx->best_mode_index = mode_index;
593#endif // CONFIG_INTERNAL_STATS
594 ctx->mic = *xd->mi[0];
595 ctx->skippable = txfm_info->skip_txfm;
596 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
597 av1_ref_frame_type(xd->mi[0]->ref_frame));
598}
599
600void av1_block_yrd(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable,
601 BLOCK_SIZE bsize, TX_SIZE tx_size);
602
603void av1_block_yrd_idtx(MACROBLOCK *x, const uint8_t *const pred_buf,
604 int pred_stride, RD_STATS *this_rdc, int *skippable,
605 BLOCK_SIZE bsize, TX_SIZE tx_size);
606
607int64_t av1_model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize,
608 MACROBLOCK *x, MACROBLOCKD *xd,
609 RD_STATS *this_rdc, int start_plane,
610 int stop_plane);
611
612void av1_estimate_block_intra(int plane, int block, int row, int col,
613 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
614 void *arg);
615
616void av1_estimate_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
617 int best_early_term, unsigned int ref_cost_intra,
618 int reuse_prediction, struct buf_2d *orig_dst,
619 PRED_BUFFER *tmp_buffers,
620 PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc,
621 BEST_PICKMODE *best_pickmode,
622 unsigned int *best_sad_norm);
623
624#endif // AOM_AV1_ENCODER_NONRD_OPT_H_
struct macroblock MACROBLOCK
Encoder's parameters related to the current coding block.
struct AV1_COMP AV1_COMP
Top level encoder structure.
@ AOM_CBR
Definition aom_encoder.h:187
static void find_predictors(AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES], struct buf_2d yv12_mb[8][3], BLOCK_SIZE bsize, int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame)
Finds predicted motion vectors for a block.
Definition nonrd_opt.h:464
int width
Definition av1_common_int.h:791
FeatureFlags features
Definition av1_common_int.h:921
CommonQuantParams quant_params
Definition av1_common_int.h:938
int height
Definition av1_common_int.h:792
RateControlCfg rc_cfg
Definition encoder.h:956
Top level encoder structure.
Definition encoder.h:2897
RATE_CONTROL rc
Definition encoder.h:3104
SPEED_FEATURES sf
Definition encoder.h:3124
AV1EncoderConfig oxcf
Definition encoder.h:2945
AV1_COMMON common
Definition encoder.h:2940
int base_qindex
Definition av1_common_int.h:623
InterpFilter interp_filter
Definition av1_common_int.h:417
bool switchable_motion_mode
Definition av1_common_int.h:415
bool allow_high_precision_mv
Definition av1_common_int.h:377
Structure to store parameters and statistics used in non-rd inter mode evaluation.
Definition nonrd_opt.h:85
int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES]
Array to hold motion vector for all modes and reference frames.
Definition nonrd_opt.h:101
unsigned int ref_costs_single[REF_FRAMES]
Array to hold ref cost of single reference mode for all ref frames.
Definition nonrd_opt.h:99
bool use_scaled_ref_frame[REF_FRAMES]
Array to hold flag indicating if scaled reference frame is used.
Definition nonrd_opt.h:111
int64_t uv_dist[(4)][REF_FRAMES]
Distortion of chroma planes for all modes and reference frames.
Definition nonrd_opt.h:93
RD_STATS this_rdc
Structure to RD cost of current mode.
Definition nonrd_opt.h:89
unsigned int vars[(4)][REF_FRAMES]
Array to hold variance of all modes and reference frames.
Definition nonrd_opt.h:97
uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES]
Array to hold flags of evaluated modes for each reference frame.
Definition nonrd_opt.h:109
int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES]
Array to hold best mv for all modes and reference frames.
Definition nonrd_opt.h:103
int single_inter_mode_costs[(4)][REF_FRAMES]
Array to hold inter mode cost of single ref mode for all ref frames.
Definition nonrd_opt.h:105
RD_STATS best_rdc
Pointer to the RD Cost for the best mode found so far.
Definition nonrd_opt.h:91
int use_ref_frame_mask[REF_FRAMES]
Array to hold use reference frame mask for each reference frame.
Definition nonrd_opt.h:107
BEST_PICKMODE best_pickmode
Structure to hold best inter mode data.
Definition nonrd_opt.h:87
struct buf_2d yv12_mb[REF_FRAMES][3]
Buffer to hold predicted block for all reference frames and planes.
Definition nonrd_opt.h:95
Extended mode info derived from mbmi.
Definition block.h:225
int_mv global_mvs[REF_FRAMES]
Global mvs.
Definition block.h:234
int16_t mode_context[MODE_CTX_REF_FRAMES]
Context used to encode the current mode.
Definition block.h:236
uint8_t ref_mv_count[MODE_CTX_REF_FRAMES]
Number of ref mvs in the drl.
Definition block.h:232
Stores the prediction/txfm mode of the current coding block.
Definition blockd.h:222
int_mv mv[2]
The motion vectors used by the current inter mode.
Definition blockd.h:244
PREDICTION_MODE mode
The prediction mode used.
Definition blockd.h:232
UV_PREDICTION_MODE uv_mode
The UV mode when intra is used.
Definition blockd.h:234
PALETTE_MODE_INFO palette_mode_info
Stores the size and colors of palette mode.
Definition blockd.h:280
uint8_t segment_id
The segment id.
Definition blockd.h:310
uint8_t ref_mv_idx
Which ref_mv to use.
Definition blockd.h:314
MV_REFERENCE_FRAME ref_frame[2]
The reference frames for the MV.
Definition blockd.h:246
FILTER_INTRA_MODE_INFO filter_intra_mode_info
The type of filter intra mode used (if applicable).
Definition blockd.h:274
MOTION_MODE motion_mode
The motion mode used by the inter prediction.
Definition blockd.h:250
uint8_t num_proj_ref
Number of samples used by warp causal.
Definition blockd.h:252
INTERINTRA_MODE interintra_mode
The type of intra mode used by inter-intra.
Definition blockd.h:259
enum aom_rc_mode mode
Definition encoder.h:598
REAL_TIME_SPEED_FEATURES rt_sf
Definition speed_features.h:2156
Stores various encoding/search decisions related to txfm search.
Definition block.h:534
uint8_t skip_txfm
Whether to skip transform and quantization on a partition block level.
Definition block.h:536
MACROBLOCKD e_mbd
Decoder's view of current coding block.
Definition block.h:907
int pred_mv1_sad[REF_FRAMES]
The sad of the 2nd mv ref (near).
Definition block.h:1131
int pred_mv0_sad[REF_FRAMES]
The sad of the 1st mv ref (nearest).
Definition block.h:1129
TxfmSearchInfo txfm_search_info
Results of the txfm searches that have been done.
Definition block.h:1327
int pred_mv_sad[REF_FRAMES]
Sum absolute distortion of the predicted mv for each ref frame.
Definition block.h:1121
MB_MODE_INFO_EXT mbmi_ext
Derived coding information.
Definition block.h:914
uint16_t weight[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]
Definition blockd.h:781
CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]
Definition blockd.h:776
MB_MODE_INFO ** mi
Definition blockd.h:617